* This Stata dofile is one of two that is written to accompany the paper:
* J Gans & A Leigh, 'Born on the First of July: An (Un)natural Experiment in Birth Timing' (2009) Journal of Public Economics, 93(1-2): 246-263 
* Feel free to use or adapt it, but please cite that paper.
* Questions to andrew_leigh@ksg02.harvard.edu

* Note that we are not permitted to share the data from this paper. Other users can only obtain the data by contacting 
* the state & territory births registrars, via the Australian Bureau of Statistics. This process took us several months.
* Note that there may be vintage effects in the data, since births are sometimes registered with several years' delay.

version 10

**********************************************************
* Creating a dataset of holidays
**********************************************************
clear
set more off
cd "C:\Users\Andrew\My publications\Aust - baby bonus bunching\"
gen date=.
set obs 366
replace date=_n+16070
format date %d
for any month day dow: gen X=X(date)
gen qb=1 if dow==1 & month==6 & day>=8 & day<=14
recode qb .=0
gen holiday=0
replace holiday=1 if (day==1 & month==1) | (day==25 & month==12) | (day==26 & month==12) | (day==26 & month==1) | (day==25 & month==4)
sort date
merge date using easter_dates.dta, nokeep
tsset date
replace holiday=1 if f2.easter==1 | l.easter==1
replace holiday=1 if qb==1
ren date bdate
drop datestr month day dow qb easter _merge
sort bdate
save holidays_2004, replace

*******************************************************
* Detailed deaths analysis
* New South Wales, QLD, WA, Tasmania, Northern Territory only 
* The sample is infant deaths aged<1 month
*******************************************************
clear
set more off
cd "C:\Users\Andrew\Datasets\ABS Births Data\"
#delimit ;
infix 
reg_yy 1-4 
reg_mm 5-6 
reg_state 7-7 
reg_no 8-13 
dth_dd 14-15 
dth_mm 16-17 
dth_yy 18-21 
dth_age 22-24 
dures_yy 25-26 
sex 27-27 
m_status 28-28 
ures_9 29-37 
bpl90 38-41 
occ 42-45 
str cod 46-49 
certifier 50-50 
issue 51-52 
indigenous 53-53  
bth_dd 54-55 
bth_mm 56-57 
bth_yy 58-61  
postcode 62-65  
cod_qery 66-66 
post_mort 67-67  
str entity_axis_data 68-259 
str record_axis_data 260-339 
record_axis_count 340-342 
using 2004_cod.txt;
#delimit cr

recode bth_dd 99=.
recode bth_mm 99=.
keep if bth_yy==2004
gen bdate= mdy(bth_mm,bth_dd,bth_yy)
format %d bdate
gen doy=doy(bdate)
gen dow=dow(bdate)

* Keep only dathes with ICD-10 code P, which is "P: ICD-10 Chapter XVI: Certain conditions originating in the perinatal period"
keep if substr(cod,1,1)=="P"

* Saving temp file (for deaths rate - strategy 2 below)
save "C:\Users\Andrew\My publications\Aust - baby bonus bunching\temp_ideaths2", replace

tab reg_state
bysort bdate: egen deaths_sum=count(sex) 
egen tag=tag(bdate)
keep if tag

* Saving temp file (for deaths rate - strategy 1 below)
sort bdate
save "C:\Users\Andrew\My publications\Aust - baby bonus bunching\temp_ideaths1", replace

*******************************************************
* Detailed births analysis
* Victoria, Western Australia, and ACT only 
*******************************************************
clear
set mem 50m
set more off
cd "C:\Users\Andrew\Datasets\ABS Births Data\"
#delimit ;
infix 
year_of_registration 1-4 
month_of_registration 5-6 
state_of_registration 7-7 
registration_district 8-9 
registration_number 10-15 
year_of_birth 16-19 
month_of_birth 20-21 
day_of_birth 22-23 
confinement_flag 24-24 
multiple_birth_flag 25-25 
sex 26-26 
nuptiality 27-27 
plurality 28-29 
liveborn 30-30 
childs_indigenous_status 31-31 
hospital 32-35 
place_of_birth_postcode 36-40 
birthweight 41-44 
mothers_age 45-47 
usual_residence_5_digit 48-52 
usual_residence_9_digit 53-61 
mothers_birthplace 62-65 
mothers_indigenous_stat 66-66 
fathers_age 67-69 
fathers_birthplace 70-73 
fathers_indigenous_stat 74-74 
previous_issue 75-76 
birth_interval_years 77-78 
birth_interval_months 79-80 
dur_of_marr_yrs 81-82 
dur_of_marr_mths 83-84 
dur_of_marr_days 85-86 
marriage_date_year 87-90 
marriage_date_month 91-92 
marriage_date_day 93-94 
marriage_place 95-98 
fathers_occupation 99-102 
issue_male 103-104 
issue_female 105-106 
dur_gestation_wks 107-108 
extra_reg_year 109-112 
bd_reg_yy 113-116 
bd_reg_dist 117-118 
bd_reg_no 119-124 
using 2004_bth.txt;
#delimit cr

keep if year_of_birth==2004
gen bdate= mdy(month_of_birth,day_of_birth,year_of_birth)
format %d bdate
gen doy=doy(bdate)
gen dow=dow(bdate)
recode mothers_age 999=.
recode fathers_age 888=. 999=.

tab state
bysort state: sum mothers_age fathers_age birthweight

* Saving temp file (for deaths rate - strategy 2 below)
save "C:\Users\Andrew\My publications\Aust - baby bonus bunching\temp_births2", replace

ren place_of_birth_postcode postcode
recode postcode 0=. 9999=. 99999=.
sort postcode
merge postcode using pcode_data_2001, nokeep keep(meanwkinc overseasborn)
tab _merge
drop _merge

sort hospital
merge hospital using wa_hospital_codes, nokeep keep(establishment type)
egen tag=tag(hospital)
list hospital establishment state if tag
drop tag
tab _merge
drop _merge

* Creating new variables
gen temp_teen=1 if mothers_age<20
recode temp_teen .=0 if mothers_age~=.
gen teenmom=temp_teen
gen mage_mean=mothers_age
gen fage_mean=fathers_age
gen page_mean=(mage_mean+fage_mean)/2
replace page_mean=mage_mean if page_mean==.
bysort bdate: egen births_sum=count(year_of_birth) 
gen inc_mean=ln(meanwkinc)
recode birthweight 0=. 9999=.
gen bweight=birthweight
gen temp1=birthweight
recode temp1 min/2499=1 2501/7000=0
gen temp2=birthweight
recode temp2 min/4000=0 4001/7000=1
gen lbw=temp1
gen hbw=temp2
drop temp*
recode nuptiality 0=1 1=0 2=1
gen unmarr=nuptiality
recode childs_indigenous_status 4/6=1 7=.
gen indig=childs_indigenous_status
gen temp1=fathers_occupation
recode temp1 3100=1 3400=1 0=. 9800/9999=. *=0
gen drdad=temp1
gen temp2=fathers_occupation
recode temp2 1000/2900=1 0=. 9800/9999=. *=0
gen profdad=temp2
gen temp3=1 if type=="Teaching"
recode temp3 .=0 if type~=""
gen tchhosp=temp3
drop temp*

* Regressions for various demographics
gen window7=1 if (month_of_birth==6 & day_of_birth>=24) | (month_of_birth==7 & day_of_birth<=7)
gen window14=1 if (month_of_birth==6 & day_of_birth>=17) | (month_of_birth==7 & day_of_birth<=14)
gen window21=1 if (month_of_birth==6 & day_of_birth>=10) | (month_of_birth==7 & day_of_birth<=21)
gen window28=1 if (month_of_birth==6 & day_of_birth>=3) | (month_of_birth==7 & day_of_birth<=28)
gen babybonus=0 if month_of_birth<=6
replace babybonus=1 if month_of_birth>=7
cd "C:\Users\Andrew\My publications\Aust - baby bonus bunching"

sort bdate
merge bdate using holidays_2004, nokeep
drop _merge

gen b=ln(births_sum)
for num 7: xi: reg b babybonus holiday i.dow if windowX==1, cl(bdate) \ outreg using results_babybonus_demographics.doc, coefastr nocons bracket 3aster replace bdec(3) se ct("log births - X day window") addstat("Moved",_b[babybonus]*X/2) adec(0)
for num 14 21 28: xi: reg b babybonus holiday i.dow if windowX==1, cl(bdate) \ outreg using results_babybonus_demographics.doc, coefastr nocons bracket 3aster append bdec(3) se ct("log births - X day window") addstat("Moved",_b[babybonus]*X/2) adec(0)
replace b=mage_mean
for num 7 14 21 28: xi: reg b babybonus holiday i.dow if windowX==1, cl(bdate) \ outreg using results_babybonus_demographics.doc, coefastr nocons bracket 3aster append bdec(3) se ct("mother's age - X day window")
replace b=fage_mean
for num 7 14 21 28: xi: reg b babybonus holiday i.dow if windowX==1, cl(bdate) \ outreg using results_babybonus_demographics.doc, coefastr nocons bracket 3aster append bdec(3) se ct("father's age - X day window")  
replace b=page_mean
for num 7 14 21 28: xi: reg b babybonus holiday i.dow if windowX==1, cl(bdate) \ outreg using results_babybonus_demographics.doc, coefastr nocons bracket 3aster append bdec(3) se ct("av. parental age - X day window")
replace b=teenmom
for num 7 14 21 28: xi: dprobit b babybonus holiday i.dow if windowX==1, cl(bdate) \ outreg using results_babybonus_demographics.doc, coefastr nocons bracket 3aster append bdec(3) se ct("teen mom - X day window") addstat("Pseudo R2",e(r2_p)) adec(3)
replace b=inc_mean
for num 7 14 21 28: xi: reg b babybonus holiday i.dow if windowX==1, cl(bdate) \ outreg using results_babybonus_demographics.doc, coefastr nocons bracket 3aster append bdec(3) se ct("mean income - X day window")
replace b=bweight 
for num 7 14 21 28: xi: reg b babybonus holiday i.dow if windowX==1, cl(bdate) \ outreg using results_babybonus_demographics.doc, coefastr nocons bracket 3aster append bdec(3) se ct("birthweight - X day window")
replace b=lbw 
for num 7 14 21 28: xi: dprobit b babybonus holiday i.dow if windowX==1, cl(bdate) \ outreg using results_babybonus_demographics.doc, coefastr nocons bracket 3aster append bdec(3) se ct("low birthweight - X day window") addstat("Pseudo R2",e(r2_p)) adec(3)
replace b=hbw 
for num 7 14 21 28: xi: dprobit b babybonus holiday i.dow if windowX==1, cl(bdate) \ outreg using results_babybonus_demographics.doc, coefastr nocons bracket 3aster append bdec(3) se ct("high birthweight - X day window") addstat("Pseudo R2",e(r2_p)) adec(3)
replace b=multiple 
for num 7 14 21 28: xi: dprobit b babybonus holiday i.dow if windowX==1, cl(bdate) \ outreg using results_babybonus_demographics.doc, coefastr nocons bracket 3aster append bdec(3) se ct("multiple birth - X day window") addstat("Pseudo R2",e(r2_p)) adec(3)
replace b=unmarr 
for num 7 14 21 28: xi: reg b babybonus holiday i.dow if windowX==1, cl(bdate) \ outreg using results_babybonus_demographics.doc, coefastr nocons bracket 3aster append bdec(3) se ct("unmarried - X day window")
replace b=indig 
for num 7 14 21 28: xi: reg b babybonus holiday i.dow if windowX==1, cl(bdate) \ outreg using results_babybonus_demographics.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Indigenous - X day window")


* Calculating share HBW (before collapse, so it's weighted by number of births)
gen weekgroup=1 if (month_of_birth==6 & day_of_birth>=3) 
replace weekgroup=2 if (month_of_birth==6 & day_of_birth>=10) 
replace weekgroup=3 if (month_of_birth==6 & day_of_birth>=17) 
replace weekgroup=4 if (month_of_birth==6 & day_of_birth>=24) 
replace weekgroup=5 if (month_of_birth==7 & day_of_birth<=28)
replace weekgroup=6 if (month_of_birth==7 & day_of_birth<=21)
replace weekgroup=7 if (month_of_birth==7 & day_of_birth<=14)
replace weekgroup=8 if (month_of_birth==7 & day_of_birth<=7)
for num 1/8: egen hbw_meanX=mean(hbw) if weekgroup==X
sum hbw*

* Collapsing to one observation per day
collapse babybonus teenmom tchhosp drdad profdad indig unmarr lbw hbw hbw_mean* bweight inc_mean page_mean fage_mean mage_mean births_sum doy dow month_of_birth day_of_birth holiday, by(bdate)

* Taking out day-of-week effect
for any teenmom tchhosp drdad profdad indig unmarr lbw hbw bweight inc_mean page_mean fage_mean mage_mean births_sum: xi: reg X holiday i.dow if doy<155 | doy>210 \ predict temp1 \ egen temp2=mean(temp1) \ replace temp1=temp1-temp2 \ gen X_sa=X-temp1 \ drop temp* \ tabstat X X_sa,by(dow)

* High birthweight graph 
set scheme s2mono
gen bb_period=0
replace bb_period=1 if (month_of_birth==6 | month_of_birth==7) 

twoway line hbw doy if doy>=155 & doy<=210 || line hbw_mean1-hbw_mean8 doy if doy>=155 & doy<=210, lwidth(thick ..) lpattern(solid ..) xline(182.5) ylab(.05 .1 .15) xtitle("") yti("") xlabel(155 "June 3" 162 "June 10" 169 "June 17" 176 "June 24" 183 "July 1" 189 "July 7" 196 "July 14" 203 "July 21" 210 "July 28",angle(vertical)) legend(off) subti("Solid horizontal lines show weekly means")  ti("Figure 6: Proportion of Births That Are High Birth Weight") note("Note: Based on 2004 births data from the Australian Capital Territory, Victoria, and Western Australia." "High birth weight share does not vary systematically across the day of the week.") 
sum hbw
tabstat hbw, by(bb_period)
tabstat hbw if doy>=155 & doy<=210,by(month_of_birth)
gen week=int(((doy-183)/7)+27)
list bdate week if doy>=155 & doy<=210
bysort week: egen hbw_week=mean(hbw)
list hbw_week if bdate==d(01jul2004)
sum hbw_week,d

****************************************************
* Combining births & deaths data
* Strategy 1: Combining All births with NSW, QLD, WA, Tas & NT deaths
****************************************************

clear
set more off
set mem 50m
set matsize 800
cd "C:\Users\Andrew\My publications\Aust - baby bonus bunching\"
use births_by_day_2006.dta,clear
renpfix _ births
reshape long births,i(month day) j(year)
ren month monthname
gen month=.
for num 1/12 \ any January February March April May June July August September October November December: replace month=X if monthname=="Y"
gen bdate=mdy(month,day,year)
gen doy=doy(bdate)
gen dow=dow(bdate)
format bdate %d
sort bdate
merge bdate using temp_ideaths1, keep(deaths_sum) nokeep
tab _merge
drop _merge
keep if year==2004 & month<=10
recode deaths_sum .=0
gen death_rate=1000*(deaths_sum/(births*0.67))

* Regressions for various demographics
gen window7=1 if (month==6 & day>=24) | (month==7 & day<=7)
gen window14=1 if (month==6 & day>=17) | (month==7 & day<=14)
gen window21=1 if (month==6 & day>=10) | (month==7 & day<=21)
gen window28=1 if (month==6 & day>=3) | (month==7 & day<=28)
gen babybonus=0 if month<=6
replace babybonus=1 if month>=7
cd "C:\Users\Andrew\My publications\Aust - baby bonus bunching"
sort bdate
merge bdate using holidays_2004, nokeep
drop _merge

gen b=death_rate
for num 7 14 21 28: xi: reg b babybonus holiday i.dow if windowX==1, cl(bdate) \ outreg using results_babybonus_demographics.doc, coefastr nocons bracket 3aster append bdec(3) se ct("Five state death rate - X day window")

****************************************************
* Combining births & deaths data
* Strategy 2: Combining WA births with WA deaths
****************************************************
cd "C:\Users\Andrew\My publications\Aust - baby bonus bunching\"
use temp_ideaths2, clear
keep if reg_state==5
bysort bdate: egen deaths_sum=count(sex) 
egen tag=tag(bdate)
keep if tag
keep bdate deaths_sum
sort bdate
save ideaths3, replace

use temp_births2, clear
keep if state==5
bysort bdate: egen births_sum=count(year_of_birth) 
egen tag=tag(bdate)
keep if tag
keep bdate births_sum
format bdate %d
gen doy=doy(bdate)
gen dow=dow(bdate)
gen month=month(bdate)
gen day=day(bdate)
gen year=year(bdate)
sort bdate
merge bdate using ideaths3
tab _merge
drop _merge
keep if year==2004 & month<=10
recode deaths_sum .=0
recode births .=0
gen logbirths=ln(births)
gen death_rate=1000*(deaths_sum/births)

* Regressions for various demographics
gen window7=1 if (month==6 & day>=24) | (month==7 & day<=7)
gen window14=1 if (month==6 & day>=17) | (month==7 & day<=14)
gen window21=1 if (month==6 & day>=10) | (month==7 & day<=21)
gen window28=1 if (month==6 & day>=3) | (month==7 & day<=28)
gen babybonus=0 if month<=6
replace babybonus=1 if month>=7
cd "C:\Users\Andrew\My publications\Aust - baby bonus bunching"
sort bdate
merge bdate using holidays_2004, nokeep
drop _merge

gen b=death_rate
for num 7 14 21 28: xi: reg b babybonus holiday i.dow if windowX==1, cl(bdate) \ outreg using results_babybonus_demographics.doc, coefastr nocons bracket 3aster append bdec(3) se ct("WA death rate - X day window")
